#Correlations

pacman::p_load(tidyverse, corrr, knitr, kableExtra, RColorBrewer, psych)

ead <-  readRDS("Data/Regimedata/Measures_merged.rds")

names(ead)
opts <- options(knitr.kable.NA = "-")

table(ead$AnckarRegtype)

names(ead)

corr.ea <- ead%>%select(RegType_lied_EA, RegType_RoW_EA,  Politytype_Anocracy, status_fh_PF, HTW_RegType_MP_Autocracy, RegType_magaloni_EA, AnckarRegtype_MP_Autocracy)%>%
  rename(Polity = Politytype_Anocracy, FH = status_fh_PF, CPR = AnckarRegtype_MP_Autocracy, RoW = RegType_RoW_EA, ARD = HTW_RegType_MP_Autocracy, LIED = RegType_lied_EA, AoW = RegType_magaloni_EA)%>%
  correlate()


### Make Table 3


corr.ea
#corr.ea%>%rename(Typology = term)%>%knitr::kable( "latex", booktabs=T,  caption = "Correlations Intermediate Regimes", label = "tab:EA_Corr", align=c("l", rep('c',times=6)), digits = 3, linesep = c(""))%>%kable_styling(latex_options = "striped") 


# Get median and mean correlations as mentioned in text.
corr.ea %>% 
  pivot_longer(!1) %>%
  summarize(median = median(value, na.rm = T),
            mean = mean(value, na.rm = T))




### Make Table C1 (appendix)
corr.dem <- ead%>%select(RegType_lied_DEM, RegType_RoW_DEM,  Politytype_Democracy, status_fh_F, HTW_RegType_Democracy, RegType_magaloni_DEM, AnckarRegtype_Democracy)%>%
  rename(Polity = Politytype_Democracy, FH = status_fh_F, CPR = AnckarRegtype_Democracy, RoW = RegType_RoW_DEM, ARD = HTW_RegType_Democracy, LIED = RegType_lied_DEM, AoW = RegType_magaloni_DEM)%>%
  correlate()


corr.dem
#corr.dem%>%rename(Typology = term)%>%knitr::kable( "latex", booktabs=T,  caption = "Correlations Democracy", label = "tab:DEM_Corr", align=c("l", rep('c',times=6)), digits = 3, linesep = c(""))%>%kable_styling(latex_options = "striped")

# Get median and mean correlations as mentioned in text.
corr.dem %>% 
  pivot_longer(!1) %>%
  summarize(median = median(value, na.rm = T),
            mean = mean(value, na.rm = T))


## Make Figure C1 (appendix)

## Corr over time
start <- ead%>%filter(year == 1900)%>%select(RegType_lied_EA, RegType_RoW_EA,  Politytype_Anocracy, status_fh_PF, HTW_RegType_MP_Autocracy, RegType_magaloni_EA, AnckarRegtype_MP_Autocracy)%>%
  rename(Polity = Politytype_Anocracy, FH = status_fh_PF, CPR = AnckarRegtype_MP_Autocracy, RoW = RegType_RoW_EA, ARD = HTW_RegType_MP_Autocracy, LIED = RegType_lied_EA, AoW = RegType_magaloni_EA)%>%
  correlate()


start <- start%>%mutate(year = 1900)

for (i in 1901:2022){
  
  print(i)
  
  year <- ead%>%filter(year == i)%>%select(RegType_lied_EA, RegType_RoW_EA,  Politytype_Anocracy, status_fh_PF, HTW_RegType_MP_Autocracy, RegType_magaloni_EA, AnckarRegtype_MP_Autocracy)%>%
    rename(Polity = Politytype_Anocracy, FH = status_fh_PF, CPR = AnckarRegtype_MP_Autocracy, RoW = RegType_RoW_EA, ARD = HTW_RegType_MP_Autocracy, LIED = RegType_lied_EA, AoW = RegType_magaloni_EA)%>%
    correlate()
  year <- year%>%mutate(year = i)
  
  start <- bind_rows(start, year)
  
  rm(year)
}

check <- start %>%
  pivot_longer(cols= 2:8, names_to = "Correlation with", values_to = "Correlation")

check$term <- factor(check$term , levels=c("LIED",   "RoW",  "Polity", "FH" ,   "ARD", "AoW", "CPR"  ) )
check$`Correlation with` <- factor(check$`Correlation with` , levels=c("LIED",   "RoW",    "Polity", "FH" ,   "ARD", "AoW", "CPR"  ) )

check %>% ggplot(aes(x= year, y = Correlation, color = `Correlation with`, linetype = `Correlation with`))+
  geom_line(linewidth = 1) + theme_classic() + #scale_color_brewer(palette="Paired") +
  scale_x_continuous(breaks=seq(1900,2020, 15)) + geom_hline(yintercept=0) + facet_grid(vars(term),) +
  theme(text=element_text(size=20))  + theme(legend.position="bottom") + xlab( "") +ylab("Pearson's r") + scale_y_continuous(breaks = c(0,.5,1), limits = c(-0.15,1)) + scale_color_grey()








### Make Figure 1



names(ead)
countries <- ead %>% select(!RegType_miller_n) %>% pivot_longer(ends_with("_n"), names_to = "Indicator", values_to = "Regime_type") %>% select(Final_Code, Indicator, Regime_type, year)


countries$Indicator <- as.factor(countries$Indicator)


## If Figure is turns out different from paper, check if levels are ordered correctly. When preparing this replication data, the initial factor order sometimes switched for reasons I was not able to identify
levels(countries$Indicator) <- c("Classifying Political Regimes", "Freedom House", "Authoritarian Regimes Data Set", "Polity",  "Lexical Index of El. Dem.", "Autocracies of the World", "Regimes of the World")


countries <- countries %>% mutate (Regime_type_c = case_when( Regime_type == 0 ~ "Other Autocracy",
                                                              Regime_type == 1 ~ "Pseudo-Democratic Autocracy",
                                                              Regime_type == 2 ~ "Democracy"))

countries$Indicator <- factor(countries$Indicator,levels=c("Lexical Index of El. Dem.", "Regimes of the World", "Polity", "Freedom House", "Authoritarian Regimes Data Set", "Autocracies of the World", "Classifying Political Regimes"))


countries$Regime_type_c <- factor(countries$Regime_type_c,levels=c("Other Autocracy", "Pseudo-Democratic Autocracy", "Democracy"))

countries %>% filter(Final_Code %in% c(471, 520, 850, 663) & year >= 1960) %>% mutate(country = case_when( Final_Code == 471 ~ "Cameroon",
                                                                                                      Final_Code == 520 ~ "Somalia",
                                                                                                      Final_Code == 850 ~ "Indonesia",
                                                                                                      Final_Code == 663 ~ "Jordan")) %>%
  ggplot(aes(year, fct_rev(Indicator))) +  geom_tile(aes(fill = fct_rev(Regime_type_c)),colour = "white") +
  scale_fill_grey(na.translate = F) +  
  guides(fill=guide_legend(title="Regime Type")) +
  labs(x = "", y = "") + scale_x_continuous(limits = c(1960, 2020), breaks = seq(1960,2020,10)) + 
  theme_classic(base_size = 20) + facet_wrap(~country, ncol = 1) + theme(legend.position="top", text=element_text(size=20), axis.text.y = element_text(size = 12))
  



### Make Figure 4

sampres <- c("RegType_lied_EA", "RegType_RoW_EA",  "Politytype_Anocracy", "status_fh_PF", "HTW_RegType_MP_Autocracy", "RegType_magaloni_EA", "AnckarRegtype_MP_Autocracy")
comparison <- c("RegType_lied_n", "RegType_RoW_n", "Politytype_n", "fh_status_n", "RegType_magaloni_n", "HTW_RegType_n", "AnckarRegtype_n")

base <- tibble(Indicator = rep(sampres,7), Comparison = c(rep(comparison[1],7), rep(comparison[2],7), rep(comparison[3],7), rep(comparison[4],7), rep(comparison[5],7), rep(comparison[6],7), rep(comparison[7],7)), DEM = NA , CA = NA, EA = NA)




for (i in seq_along(sampres)){
  current_indicator <- sampres[i]
  for (j in seq_along(comparison)){
    
    print(paste("Indicator",sampres[i],"----", "Comparison", comparison[j]))  
    
    current_comparison <- comparison[j]
    
    print(table(base$DEM))
    
    
    
    dem_count <- ead%>%filter(!!sym(sampres[i]) == 1 & !!sym(comparison[j]) == 2) %>% tally() %>% pull()
    ea_count  <- ead%>%filter(!!sym(sampres[i]) == 1 & !!sym(comparison[j]) == 1) %>% tally() %>% pull()
    ca_count <- ead%>%filter(!!sym(sampres[i]) == 1 & !!sym(comparison[j]) == 0) %>% tally() %>% pull()
    
    
    base$DEM <- ifelse(base$Indicator == sampres[i] & base$Comparison == comparison[j], dem_count, base$DEM)
    base$EA <- ifelse(base$Indicator == sampres[i] & base$Comparison == comparison[j], ea_count, base$EA)
    base$CA <- ifelse(base$Indicator == sampres[i] & base$Comparison == comparison[j], ca_count, base$CA)
    
    
    
  }
  
}




base$Comparison <- as.factor(base$Comparison)

#levels(countries$Indicator) <- c()



levels(base$Comparison) <- c("CPR", "FH", "ARD", "Polity", "LIED", "AoW", "RoW")
base$Comparison <- factor(base$Comparison, levels=c("LIED", "RoW", "Polity", "FH", "ARD", "AoW", "CPR"))




base$Indicator <- as.factor(base$Indicator)
levels(base$Indicator) <- c("Classifying Political Regimes", "Authoritarian Regimes Data Set", "Polity", "Lexical Index of Electoral Democracy",  "Autocracies of the World", "Regimes of the World" , "Freedom House")

base$Indicator <- factor(base$Indicator, levels=c("Lexical Index of Electoral Democracy", "Regimes of the World", "Polity", "Freedom House", "Authoritarian Regimes Data Set", "Autocracies of the World", "Classifying Political Regimes"))


data <- base %>% pivot_longer(!1:2, names_to = "Regime_Type", values_to = "count")


names(data)


works <- data%>%group_by(Indicator, Comparison)%>%mutate(total = sum(count),
                                                         share = count/total)



works$Regime_Type <- as.factor(works$Regime_Type)
levels(works$Regime_Type) <- c("Other Autocracy", "Democracy", "Intermediate Regime")

works$Regime_Type  <- factor(works$Regime_Type, levels = c("Democracy","Intermediate Regime", "Other Autocracy"))




rect_data <- data.frame(
  xmin = c(.5,.5,2.5,2.5,4.5,4.5,4.5),  # start of rectangle in x-axis (adjust as needed)
  xmax = c(2.5,2.5,4.5,4.5,7.5,7.5,7.5),  # end of rectangle in x-axis (adjust as needed)
  ymin = c(0,0,0,0,0,0,0),            # start of rectangle in y-axis
  ymax = c(1,1,1,1,1,1,1),            # end of rectangle in y-axis (adjust as needed)
  Indicator = factor(c("Lexical Index of Electoral Democracy", "Regimes of the World", "Polity", "Freedom House", "Authoritarian Regimes Data Set", "Autocracies of the World", "Classifying Political Regimes"))  # use the facet names or panel names in your data
)


works%>%
  filter(Regime_Type != "Intermediate Regime") %>%
  mutate(text_col = if_else(Regime_Type == "Democracy", "1", "2")) %>%
  ggplot(aes(x=Comparison, y=share, fill = Regime_Type)) + 
  geom_bar(stat = "identity") +facet_wrap(~Indicator, ncol = 2) +
  theme_classic()  + ylab("") + xlab("") + scale_fill_grey() +
  theme(text=element_text(size=15)) +  labs(fill='Mismatch as coded as:') +  theme(legend.position = "bottom") +
  scale_y_continuous(limits = c(0,1), breaks = c(.25, .5, .75, 1), labels = function(y) paste0(y*100, "%")) +
  theme(panel.grid.major.y = element_line(color = "black",
                                         size = 0.5,
                                         linetype = 2)) +
  geom_rect(data = rect_data, aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax),
            color = "black", fill = NA, size = 1, inherit.aes = FALSE)  # Customize color, fill, and size as needed
## Make calculations for proportions mentioned in text

levels(works$Comparison) <- c("Lexical Index of Electoral Democracy", "Regimes of the World", "Polity", "Freedom House", "Authoritarian Regimes Data Set", "Autocracies of the World", "Classifying Political Regimes")




works2 <- works %>%
  filter(Regime_Type == "Intermediate Regime" & Indicator != Comparison)

works3 <- works %>%
  filter(Regime_Type == "Democracy" & Indicator != Comparison)
works4 <- works %>%
  filter(Regime_Type == "Other Autocracy" & Indicator != Comparison)

## Mean share of PDAs classified differently
summary(1 - works2$share)
## Mean share PDA classified as democracy
summary(works3$share)
## Mean share PDA classified as otjher autocracy

summary(works4$share)

## mean agreement for PDA categories
works2 %>%
  group_by(Indicator) %>%
  summarize(mean=mean(share))

works2 %>%
  group_by(Indicator) %>%
  summarize(mean=mean(share)) %>%
  summarize(mean = mean(mean))


### Make Figure D1

df1 <- ead%>%select(year, Politytype_n)%>%filter(!is.na(Politytype_n))
df1 <- df1%>%group_by(year, Politytype_n)%>%tally()
df1$Politytype_n <- factor(df1$Politytype_n , levels=c(2, 1, 0))
df1 <- df1  %>%
  group_by(year, Politytype_n) %>%
  summarise(n = sum(n)) %>%
  mutate(percentage = n / sum(n))
df1 <- df1%>%mutate(Indicator = "Polity")%>%rename(Regime_Type = Politytype_n)%>%filter(year %in%1800:2018)



df2 <- ead%>%select(year, fh_status_n)%>%filter(!is.na(fh_status_n))
df2 <- df2%>%group_by(year, fh_status_n)%>%tally()
df2$fh_status_n <- factor(df2$fh_status_n , levels=c(2, 1, 0) )
df2 <- df2  %>%
  group_by(year, fh_status_n) %>%
  summarise(n = sum(n)) %>%
  mutate(percentage = n / sum(n))
df2 <- df2%>%mutate(Indicator = "FH")%>%rename(Regime_Type = fh_status_n)


df3 <- ead%>%select(year, RegType_RoW_n)%>%filter(!is.na(RegType_RoW_n))
df3 <- df3%>%group_by(year, RegType_RoW_n)%>%tally()
df3$RegType_RoW_n <- factor(df3$RegType_RoW_n , levels=c(2, 1, 0) )
df3 <- df3  %>%
  group_by(year, RegType_RoW_n) %>%
  summarise(n = sum(n)) %>%
  mutate(percentage = n / sum(n))
df3 <- df3%>%mutate(Indicator = "RoW")%>%rename(Regime_Type = RegType_RoW_n)


df4 <- ead%>%select(year, RegType_lied_n)%>%filter(!is.na(RegType_lied_n))
df4 <- df4%>%group_by(year, RegType_lied_n)%>%tally()
df4$RegType_lied_n <- factor(df4$RegType_lied_n , levels=c(2, 1, 0) )
df4 <- df4  %>%
  group_by(year, RegType_lied_n) %>%
  summarise(n = sum(n)) %>%
  mutate(percentage = n / sum(n))
df4 <- df4%>%mutate(Indicator = "LIED")%>%rename(Regime_Type = RegType_lied_n)


df5 <- ead%>%select(year, HTW_RegType_n)%>%filter(!is.na(HTW_RegType_n))
df5 <- df5%>%group_by(year, HTW_RegType_n)%>%tally()
df5$HTW_RegType_n <- factor(df5$HTW_RegType_n , levels=c(2, 1, 0) )
df5 <- df5  %>%
  group_by(year, HTW_RegType_n) %>%
  summarise(n = sum(n)) %>%
  mutate(percentage = n / sum(n))
df5 <- df5%>%mutate(Indicator = "ARD")%>%rename(Regime_Type = HTW_RegType_n)


df6 <- ead%>%select(year, AnckarRegtype_n)%>%filter(!is.na(AnckarRegtype_n))
df6 <- df6%>%group_by(year, AnckarRegtype_n)%>%tally()
df6$AnckarRegtype_n <- factor(df6$AnckarRegtype_n , levels=c(2, 1, 0) )
df6 <- df6  %>%
  group_by(year, AnckarRegtype_n) %>%
  summarise(n = sum(n)) %>%
  mutate(percentage = n / sum(n))
df6 <- df6%>%mutate(Indicator = "CPR")%>%rename(Regime_Type = AnckarRegtype_n)




df7 <- ead%>%select(year, RegType_magaloni_n)%>%filter(!is.na(RegType_magaloni_n))
df7 <- df7%>%group_by(year, RegType_magaloni_n)%>%tally()
df7$RegType_magaloni_n <- factor(df7$RegType_magaloni_n , levels=c(2, 1, 0) )
df7 <- df7  %>%
  group_by(year, RegType_magaloni_n) %>%
  summarise(n = sum(n)) %>%
  mutate(percentage = n / sum(n))
df7 <- df7%>%mutate(Indicator = "AoW")%>%rename(Regime_Type = RegType_magaloni_n)






df <- bind_rows(df4, df3, df1, df2, df5, df7, df6)








df$Indicator <- factor(df$Indicator , levels=c("LIED", "RoW", "Polity", "FH", "ARD", "AoW", "CPR") )

unique(df$Indicator)
df <- df%>%mutate(Reg_Char = case_when(Regime_Type == 2 ~ "Democracy",
                                       Regime_Type == 1 ~ "Pseudo-Democratic Autocracy",
                                       Regime_Type == 0 ~ "Other Autocracy"))

df$Reg_Char <- factor(df$Reg_Char , levels=c("Democracy", "Pseudo-Democratic Autocracy", "Other Autocracy") )



ggplot(df%>%filter(year >= 1945), aes(x=year, y=percentage, fill=Reg_Char)) + 
  geom_area(alpha=0.6 , size=1, colour="black") + theme_classic() + facet_wrap(~Indicator, ncol = 2) +
  scale_fill_grey() + labs(y="", x = "", title = "") +  theme(text=element_text(size=15)) +
  scale_x_continuous(breaks=seq(1950,2020, 10)) + labs(fill='Regime Type') +   theme(legend.position = "bottom") + 
  scale_y_continuous(limits = c(-.0001,1.0001), breaks = c(0,.5,1), labels = function(y) paste0(y*100, "%")) 


